## Replication File 2 of 2
## Uses prepped data LL2024.csv output by 
## LarsonLewis2024_1_PrepData.R
## "Reducing Prejudice Towards Refugees in Uganda: 
## Evidence that Social Networks Influence Attitudes"
## Jennifer M. Larson and Janet I. Lewis
## Created: Feb 2, 2022
## Updated: Feb 23, 2024

## Analyses performed in R version 4.3.1 (2023-06-16) -- "Beagle Scouts"

#install.packages("tidyverse")
#install.packages("igraph")
#install.packages("xtable")
#install.packages("stargazer")
#install.packages("plyr")

#library(plyr) #Don't load immediately, masks a needed function.  Load at line 1525 as indicated below.
library(tidyverse)
library(igraph)
library(xtable)
library(stargazer)



Data <- read_csv("LL2024.csv") #521x145



##############################################################################
##############################################################################
## Analyses for Main Paper
##############################################################################
##############################################################################


####
## Table 1
####

## Create four separate village datasets
v1 <- Data[which(Data$village_number == 1),]
v2 <- Data[which(Data$village_number == 2),]
v3 <- Data[which(Data$village_number == 3),]
v4 <- Data[which(Data$village_number == 4),]

datalist <- list(v1, v2, v3, v4, Data)
datalistnames <- c("Vlg 1", "Vlg 2", "Vlg 3", "Vlg 4", "All")
outmat <- matrix(data = NA, nrow = 14, ncol = length(datalist))

for (i in 1:length(datalist)){
  thedata <- datalist[[i]]
  outmat[1,i] <- mean(thedata$village_number)
  outmat[2,i] <- round(mean(thedata$resp_age), 0)
  outmat[3,i] <- round(mean(thedata$religion == "Protestant"),2)
  outmat[4,i] <- round(mean(thedata$religion == "Catholic"),2)
  outmat[5,i] <- round(mean(thedata$religion == "Muslim"),2)
  outmat[6,i] <- round(mean(thedata$farmer == 1),2)
  outmat[7,i] <- round(mean(thedata$resp_occupation == "Trader"),2)
  outmat[8,i] <- mean(thedata$resp_educ_coarse == "None")
  outmat[9,i] <- mean(thedata$resp_educ_coarse == "SomePrimary")
  outmat[10,i] <- mean(thedata$resp_educ_coarse == "SomeSecondary")
  outmat[11,i] <- mean(thedata$resp_educ_coarse == "SomeCollege")
  outmat[12,i] <- round(mean(thedata$years_live == "More than 5 years" ),2)
  outmat[13,i] <- nrow(thedata)
  outmat[14,i] <- sum(thedata$in_el == 1)
  rm(thedata)
}

colnames(outmat) <- datalistnames
rownames(outmat) <- c("Vlg Number", "Age", "Protestant", "Catholic", "Muslim", 
                      "Farmer", "Trader", "No Educ", "Primary Educ", "Secondary Educ", "College Educ", "Lived > 5yrs",
                      "Baseline hhs","Endline hhs")

## Make a matrix of digits to retain, round everything larger than 1 but keep 2 digits if <1
digits <- matrix(data = NA, nrow = nrow(outmat), ncol = ncol(outmat))
for(i in 1:nrow(outmat)){
  for(j in 1:ncol(outmat)){
    digits[i,j] <- ifelse(outmat[i,j] >= 1, 0, 2) #round everything >= 1, but keep 2 digits if <1
  }
}

digits <- cbind(rep(0, nrow(outmat)), digits)
xtable(outmat, align = "lrrrr|r", digits = digits)

rm(outmat)
rm(digits)



####
## Table 2
####

## Refugee experience:

outmat <- matrix(data = NA, nrow = 3, ncol = length(datalist))

for (i in 1:length(datalist)){
  thedata <- datalist[[i]]
  outmat[1,i] <- mean(thedata$village_number)
  outmat[2,i] <- mean(thedata$been_refugee == "Yes")
  outmat[3,i] <- mean(thedata$refugee_meet == "Yes")
  rm(thedata)
}

colnames(outmat) <- datalistnames
rownames(outmat) <- c("Vil Num", "Has been refugee", "Has met refugee")

## FIRST TWO ROWS OF TABLE 2:
xtable(outmat, align = "lrrrr|r", digits = 2)

rm(outmat)
rm(datalist)

## Hearing about refugees

makeatablecomref <- function(datalist, datalistnames){
  
  #Make an empty matrix with a col for each dataset, row for each variable
  outmat <- matrix(data = NA, nrow = 7, ncol = length(datalist))
  
  for (i in 1:length(datalist)){
    thedata <- datalist[[i]]
    outmat[1,i] <- mean(thedata$village_number)
    outmat[2,i] <- mean(thedata$from_hhmem +
                          thedata$from_nonhhmem +
                          thedata$from_friend +
                          thedata$from_newspaper +
                          thedata$from_radio +
                          thedata$from_tv +
                          thedata$from_other)
    outmat[3,i] <- (sum(thedata$from_hhmem > 0) +
                      sum(thedata$from_nonhhmem > 0) +
                      sum(thedata$from_friend > 0)) / nrow(thedata)
    outmat[4,i] <- sum(thedata$from_radio > 0) / nrow(thedata)
    outmat[5,i] <- sum(thedata$from_newspaper > 0) / nrow(thedata)
    outmat[6,i] <- sum(thedata$from_tv > 0) / nrow(thedata)
    outmat[7,i] <- sum(thedata$from_other > 0) / nrow(thedata)
    
  }
  
  colnames(outmat) <- datalistnames
  rownames(outmat) <- c("Vlg Number", 
                        "Num times came up last week", "Heard from friend or family", 
                        "Heard from radio", "Heard from newspaper", 
                        "Heard from TV", "Heard from other")
  
  
  return(xtable(outmat, digits = 2))
  
}

## BOTTOM 7 ROWS OF TABLE 2

makeatablecomref(datalist = list(v1, v2, v3, v4, Data),
                 datalistnames = c("Vlg 1", "Vlg 2", "Vlg 3", "Vlg 4", "All"))

rm(makeatablecomref)
rm(datalistnames)



####
## Figure 3
####

## Separate dataset by villages
v1 <- Data[which(Data$village_number == 1),]
v2 <- Data[which(Data$village_number == 2),]
v3 <- Data[which(Data$village_number == 3),]
v4 <- Data[which(Data$village_number == 4),]

## Let's make indices for treated and control in each
t1 <- which(v1$treatment == 1)
t2 <- which(v2$treatment == 1)
t3 <- which(v3$treatment == 1)
t4 <- which(v4$treatment == 1)

c1 <- which(v1$treatment == 0)
c2 <- which(v2$treatment == 0)
c3 <- which(v3$treatment == 0)
c4 <- which(v4$treatment == 0)

## And treatment and control indices for the full dataset
t <- which(Data$treatment == 1)
c <- which(Data$treatment == 0)



## Construct a ggplot dataframe for separate attitude indices, baseline 1, 2, and endline

makehistdf <- function(data, rowind= c(1:nrow(data))){
  data <- data[rowind,]
  
  df <- data.frame(Count0 = c(table(factor(data$noprob1, levels = 1:5)), 
                              table(factor(data$fit1, levels = 1:5)),
                              table(factor(data$burden1, levels = 1:5)),
                              table(factor(data$values1, levels = 1:5)),
                              table(factor(data$land1, levels = 1:5)),
                              table(factor(data$threat1, levels = 1:5))),
                   Percent0 = c(table(factor(data$noprob1, levels = 1:5))/nrow(data),
                                table(factor(data$fit1, levels = 1:5))/nrow(data),
                                table(factor(data$burden1, levels  = 1:5))/nrow(data),
                                table(factor(data$values1, levels = 1:5))/nrow(data),
                                table(factor(data$land1, levels = 1:5))/nrow(data),
                                table(factor(data$threat1, levels = 1:5))/nrow(data)),
                   Count1 = c(table(factor(data$noprob2, levels = 1:5)),
                              table(factor(data$fit2, levels = 1:5)),
                              table(factor(data$burden2, levels = 1:5)),
                              table(factor(data$values2, levels = 1:5)),
                              table(factor(data$land2, levels = 1:5)),
                              table(factor(data$threat2, levels = 1:5))),
                   Percent1 = c(table(factor(data$noprob2, levels = 1:5))/nrow(data),
                                table(factor(data$fit2, levels = 1:5))/nrow(data),
                                table(factor(data$burden2, levels = 1:5))/nrow(data),
                                table(factor(data$values2, levels = 1:5))/nrow(data),
                                table(factor(data$land2, levels = 1:5))/nrow(data),
                                table(factor(data$threat2, levels = 1:5))/nrow(data)),
                   Count2 = c(table(factor(data$noprob_e, levels = 1:5)),
                              table(factor(data$fit_e, levels = 1:5)),
                              table(factor(data$burden_e, levels = 1:5)),
                              table(factor(data$values_e, levels = 1:5)),
                              table(factor(data$land_e, levels = 1:5)),
                              table(factor(data$threat_e, levels = 1:5))),
                   Percent2 = c(table(factor(data$noprob_e, levels = 1:5))/nrow(data),
                                table(factor(data$fit_e, levels = 1:5))/nrow(data),
                                table(factor(data$burden_e, levels = 1:5))/nrow(data),
                                table(factor(data$values_e, levels = 1:5))/nrow(data),
                                table(factor(data$land_e, levels = 1:5))/nrow(data),
                                table(factor(data$threat_e, levels = 1:5))/nrow(data)),
                   Question = c(rep("NoProb", 5),
                                rep("Fit", 5),
                                rep("Burden", 5),
                                rep("Values", 5),
                                rep("Land", 5),
                                rep("Threat", 5)),
                   Score = c(1:5,
                             1:5,
                             1:5,
                             1:5,
                             1:5,
                             1:5))
  out <- df
  return(out)
  
}


## Make baseline attitude histograms for baseline for everyone in the data

dfpooled <- makehistdf(Data) #all data

atthist0 <- function(ggdf, title = "Baseline Attitudes"){ 
  
  p <- ggplot(data=ggdf, aes_string(x = "Score", y= "Percent0", fill = "Question"))+ #using string lets feed yvar
    geom_bar(position = "dodge", stat="identity")+
    scale_fill_brewer(palette = "Accent")+
    ylim(0,1)+
    labs(title = title, y="")+
    theme_minimal()
  #  theme(legend.position = "none")
  out <- p
  return(out)
}

## PAPER FIGURE 3
## Save as All_atts_b1_all.pdf 4x7
atthist0(dfpooled, "Baseline, Everyone")

rm(dfpooled)
rm(makehistdf)
rm(atthist0)


####
## Table 3
####

## Make a table of average attitude scores at each data collection stage

makeatabletrtatts <- function(datalist, datalistnames){
  
  #Make an empty matrix with a col for each dataset, row for each variable
  outmat <- matrix(data = NA, nrow = 13, ncol = length(datalist))
  
  for (i in 1:length(datalist)){
    thedata <- datalist[[i]]
    outmat[1,i] <- mean(thedata$village_number)
    outmat[2,i] <- mean(thedata$proref1)
    outmat[3,i] <- mean(thedata$proref2[thedata$treatment == 1])
    outmat[4,i] <- mean(thedata$proref_e[thedata$in_el==1])
    outmat[5,i] <- mean(thedata$proref_st[thedata$treatment == 1])
    outmat[6,i] <- mean(thedata$proref_lt[thedata$in_el == 1])
    outmat[7,i] <- sum(thedata$proref_st[thedata$treatment == 1] == 0) / length(thedata$proref_st[thedata$treatment == 1])
    outmat[8,i] <- sum(thedata$proref_st[thedata$treatment == 1] > 0) / length(thedata$proref_st[thedata$treatment == 1])
    outmat[9,i] <- sum(thedata$proref_st[thedata$treatment == 1] < 0) / length(thedata$proref_st[thedata$treatment == 1])
    outmat[10,i] <- sum(thedata$proref_lt[thedata$in_el == 1] == 0) / length(thedata$proref_lt[thedata$in_el == 1])
    outmat[11,i] <- sum(thedata$proref_lt[thedata$in_el == 1] > 0) / length(thedata$proref_lt[thedata$in_el == 1])
    outmat[12,i] <- sum(thedata$proref_lt[thedata$in_el == 1] < 0) / length(thedata$proref_lt[thedata$in_el == 1])
    outmat[13,i] <- nrow(thedata)
  }
  
  colnames(outmat) <- datalistnames
  rownames(outmat) <- c("Vlg Number", "Pro-ref score, bl", "Pro-ref score, bl2", 
                        "Pro-ref score, el", "Short-term change", 
                        "Long-term change", "Prop s.t. change 0", 
                        "Prop s.t. change > 0", "Prop s.t. change < 0",
                        "Prop l.t. change 0", "Prop l.t. change > 0", 
                        "Prop l.t. change < 0", "n")
  
  dig <- matrix(2, ncol = length(datalist) + 1, nrow = 13)
  dig[1:6,] <- 1
  dig[13,] <- 0
  
  
  return(xtable(outmat, digits = dig))
  
}

## Table by village of refugee score and response to treatment

## Info for Paper Table 3 (only kept some rows for paper)

makeatabletrtatts(datalist = list(v1[t1,], v2[t2,], v3[t3,], v4[t4,], Data[t,]),
                  datalistnames = c("Vlg 1", "Vlg 2", "Vlg 3", "Vlg 4", "All"))

rm(makeatabletrtatts)



####
## Figure 4
####

## Make a figure to show the response to treatment.
## First, make a dataset that collects proref1, proref2, and proref_e as waves for the treated

wave1 <- Data[t, c("village_number", "proref1")]
names(wave1) <- c("village_number", "score")
wave1$wave <- "Baseline"
wave2 <- Data[t, c("village_number", "proref2")]
names(wave2) <- c("village_number", "score")
wave2$wave <- "Baseline 2"
wave3 <- Data[t, c("village_number", "proref_e")]
names(wave3) <- c("village_number", "score")
wave3$wave <- "Endline"

t_waves <- rbind(wave1, wave2, wave3)

pld <- t_waves
pld$village_number <- "Pooled"

t_tot <- rbind(t_waves, pld)

rm(wave1)
rm(wave2)
rm(wave3)
rm(pld)
rm(t_waves)

## First create a count of each category
t_tot_1 <- t_tot %>% group_by(village_number, wave) %>% summarize(N = n()) 

## Now summarize each category
t_tot_2 <- t_tot %>% group_by(village_number, wave) %>%
  summarize_all(list(mean, sd), na.rm=T) %>%
  ungroup()

## And add the count column to this
names(t_tot_2) <- c("village_number", "wave", "score_mean", "score_sd")
t_tot_2$count <- t_tot_1$N

## Now add a standard error column, calculated by standard deviation / sqrt n
t_tot_2 <- t_tot_2 %>% mutate(se = score_sd / sqrt(count))

## Let's prepare to add stars to indicate the result of 2-sided t-tests comparing
## the baseline and the endline

## First, add p-values from t-tests.  Manually make vector of these in order
## of village, and then bl1, bl2, el, with pooled at end (same order as t_tot_2)
pvals <- rep(NA, nrow(t_tot_2))
pvals[2] <- (t.test(v1$proref1[t1], v1$proref2[t1]))$p.value #21.4 v. 23.3, p = .02)
pvals[3] <- (t.test(v1$proref1[t1], v1$proref_e[t1]))$p.value #21.3 v. 22.2, p = .3

pvals[5] <-(t.test(v2$proref1[t2], v2$proref2[t2]))$p.value #20.0 v. 23.3, p < .001
pvals[6] <- (t.test(v2$proref1[t2], v2$proref_e[t2]))$p.value #20.0 v. 21.6, p = .07

pvals[8] <- (t.test(v3$proref1[t3], v3$proref2[t3]))$p.value #24.3 v. 26.7, p < .001
pvals[9] <- (t.test(v3$proref1[t3], v3$proref_e[t3]))$p.value #24.3 v. 25.6, p = .08

pvals[11] <- (t.test(v4$proref1[t4], v4$proref2[t4]))$p.value #23.3 v. 25.8, p < .001
pvals[12] <- (t.test(v4$proref1[t4], v4$proref_e[t4]))$p.value #23.3 v. 24.1, p = .22)

pvals[14] <- (t.test(Data$proref1[t], Data$proref2[t]))$p.value #22.6 v. 25.1, very significant increase in attitude index pooled
pvals[15] <- (t.test(Data$proref1[t], Data$proref_e[t]))$p.value #22.7 v. 23.8, significant increase in attitude index pooled)


t_tot_2$pvals <- pvals
rm(pvals)

## And now let's add character representation

stars <- rep("", nrow(t_tot_2))
for(i in 1:length(t_tot_2$pvals)){
  if(is.na(t_tot_2$pvals[i])) next
  if(t_tot_2$pvals[i] > .1){
    stars[i] <- ""
  }
  if(.05 < t_tot_2$pvals[i] & t_tot_2$pvals[i] <= .1){
    stars[i] <- "-"
  }
  if(.01 < t_tot_2$pvals[i] & t_tot_2$pvals[i] <= .05){
    stars[i] <- "*"
  }
  if(.001 < t_tot_2$pvals[i] & t_tot_2$pvals[i] <= .01){
    stars[i] <- "**"
  }
  if(t_tot_2$pvals[i] < .001){
    stars[i] <- "***"
  }
}

t_tot_2$stars <- stars
rm(stars)


## Now make a plot that adds these stars above the relevant line segments

## PAPER FIGURE 4
## Save as MeanAttChange_wstars.pdf, 4x5
## This one is probably more informative, shows spread of single mean (one standard error for each)
p <- ggplot(t_tot_2, mapping = aes(x = village_number, y = score_mean, color = wave,
                                   ymin = score_mean-1*se, ymax = score_mean+1*se))
p+geom_pointrange(position = position_dodge(width = .1)) +
  geom_text(mapping = aes(label = stars), nudge_x = .15) +
  scale_fill_manual(values = c("grey10", "grey90")) +
  labs(x = "Village", 
       y = "Mean Attitude Score of the Treated",
       color = "Survey Wave") +
  ylim(19,29) +
  coord_flip()





#####
## Figure 5
####


makechangedf <- function(data, rowind= c(1:nrow(data))){ #default to using all rows in data
  data <- data[rowind,]
  
  df <- data.frame(Change = c(data$proref_st, data$proref_lt), 
                   Timeframe=c(rep("short-term", length(data$proref_st)),
                               rep("long-term", length(data$proref_lt))),
                   Village = c(as.character(data$village_number), as.character(data$village_number)))
  
  out <- df
  return(out)
}


attchangehist<- function(ggdf, title = "Attitude Change"){
  p <- ggplot(ggdf, aes(x = Change, fill = Timeframe)) + geom_histogram(position="identity", alpha = .3, bins = 20)+
    scale_fill_brewer(palette = "Accent")+
    geom_vline(xintercept = mean(ggdf$Change[ggdf$Timeframe=="short-term"], na.rm=T), color = "plum3")+
    geom_vline(xintercept = mean(ggdf$Change[ggdf$Timeframe=="long-term"], na.rm=T), color = "darkseagreen4")+
    theme_minimal()+
    labs(title=title,x="Change in Score", y="", fill = "") +
    theme(legend.position="top") +
    xlim(-22,22)
  
  out <- p
  return(out)
}

## Although the plotting function will correctly drop respondents that are not
## in the endline, it will provide a warning.  We can use an index for
## treated respondents who remained in the endline.

tel <- which(Data$treatment == 1 & Data$in_el == 1)

dfpooledchange <- makechangedf(Data, tel)

## PAPER FIGURE 5:
#save as 3.5x4 pdf, All_attchange.pdf
attchangehist(dfpooledchange, "All Villages") #warning fine, from NAs from people not in endline

rm(dfpooledchange)
rm(attchangehist)
rm(makechangedf)
rm(p)
rm(t_tot, t_tot_1, t_tot_2)

####
## Figure 6
####

tinel <- which(Data$treatment == 1 & Data$in_el == 1)

df_poly <- data.frame(
  x_atten=c(0, Inf, Inf),
  y_atten=c(0, Inf, 0),
  x=c(-Inf, Inf, -Inf),
  y=c(-Inf, Inf, Inf),
  x_accel=c(0, Inf, 0),
  y_accel=c(0, Inf, Inf),
  x_negaccel=c(-Inf,0,0),
  y_negaccel=c(-Inf,0,-Inf),
  x_negatten=c(-Inf,0,-Inf),
  y_negatten=c(-Inf,0,0)
)

## PAPER FIGURE 6
## save as st_v_lt_change_HIGHLIGHTED.pdf 6x7
p <- ggplot(data = Data[tinel,], mapping = aes(x = proref_st, y = proref_lt))
p + geom_jitter(aes(color = as.character(village_number)))+
  xlim(-21, 21) +
  ylim(-21,21) +
  geom_vline(xintercept = 0, alpha = .5, linetype = "dashed") +
  geom_hline(yintercept = 0, alpha = .5, linetype = "dashed") +
  geom_abline(slope = 1, alpha = .5, linetype = "dashed") +
  geom_polygon(data = df_poly, aes(x=x_atten, y=y_atten), fill="yellow", alpha = .2) +
  #annotate("rect", fill = "red", alpha = .2, xmin = 0, xmax = Inf, ymin = 0, ymax = -Inf) + 
  #annotate("rect", fill = "red", alpha = .2, xmin = 0, xmax = -Inf, ymin = 0, ymax = Inf) + 
  labs(x = "Short-term response to treatment relative to baseline", 
       y = "Long-term response to treatment relative to baseline",
       title = "Change in Attitudes of the Treated",
       color = "Village")+
  theme_minimal() 

rm(df_poly, p, tinel)


####
## Figure 7
####

## Next, plot baseline and endline for t and c, separated by village and pooled
## First make the right dataset


wave1 <- Data[, c("village_number", "treatment", "proref1")]
names(wave1) <- c("village_number", "treatment", "score")
wave1$wave <- "Baseline"
wave2 <- Data[, c("village_number", "treatment", "proref_e")]
names(wave2) <- c("village_number", "treatment", "score")
wave2$wave <- "Endline"

t_waves <- rbind(wave1, wave2)

## Add data again for pooled, call village_number "Pooled"
pld <- t_waves
pld$village_number <- "Pooled"

t_tot <- rbind(t_waves, pld)

rm(t_waves)
rm(wave1)
rm(wave2)
## Now add counts, and mean and sd of score

## First create a count of each category
t_tot_1 <- t_tot %>% group_by(village_number, wave, treatment) %>%
  summarize(N = n()) 

## Now summarize each category
all_tot <- t_tot %>% group_by(village_number, wave, treatment) %>%
  summarize_at(.vars = "score", list(mean, sd), na.rm=T) %>%
  ungroup()

## And add the count column to this
names(all_tot) <- c("village_number", "wave", "treatment", "score_mean", "score_sd")
all_tot$count <- t_tot_1$N

## Now add a standard error column, calculated by standard deviation / sqrt n
all_tot <- all_tot %>% mutate(se = score_sd / sqrt(count))

## Finally, rename the treatment indicator to be informative

newtrt <- all_tot$treatment
newtrt <- ifelse(newtrt == 1, "Treated", "Control")

all_tot$trt_named <- newtrt

## Now let's plot the baseline and endline score by treatment condition
## for all villages as well as the pooled, and add the standard error bars
## for the pooled


## Let's manually make a dataframe to be used in the correct line segments.  Collect each village/trt group's 
## baseline mean score and their endline mean score in the same row

thebls <- which(all_tot$wave == "Baseline")
theels <- which(all_tot$wave == "Endline")

segs <- cbind(all_tot[thebls,], all_tot[theels,])
dim(segs) #10x16, second eight are duplicate names
names(segs)
names(segs)[9:16] <- c("village_number2", "wave2","treatment2",  "score_mean2", "score_sd2", "count2", "se2", "trt_named2")

#segs

## PAPER FIGURE 7:
## Save as TvC_meanchange.pdf, 4x5

p <- ggplot(data = all_tot, mapping = aes(x = wave, y = score_mean, color = village_number, 
                                          ymin = score_mean-se, ymax = score_mean+se))
p  + geom_errorbar(data = subset(all_tot, village_number %in% c("Pooled")), position = "identity", width = .15) +
  geom_point(data = subset(all_tot, village_number %in% c("1", "2", "3", "4")), 
             aes(x = wave, y = score_mean)) +
  geom_point(data = subset(all_tot, village_number %in% c("Pooled")), 
             aes(x = wave, y = score_mean), size = 2) +
  geom_segment(data = subset(segs, village_number %in% c("1", "2", "3", "4")), 
               mapping = aes(x = wave, xend = wave2, y = score_mean, yend = score_mean2),
               alpha = .3) +
  geom_segment(data = subset(segs, village_number %in% c("Pooled")), 
               mapping = aes(x = wave, xend = wave2, y = score_mean, yend = score_mean2),
               alpha = 1, size = 1.5) +
  facet_wrap(~trt_named)+
  labs(x = "",
       y = "Mean score",
       color = "Village"
  )+
  ylim(17,28)+
  theme_minimal()


## Tidy
rm(all_tot)
rm(pld)
rm(segs)
rm(t_tot)
rm(t_tot_1)
rm(newtrt)
rm(thebls)
rm(theels)
rm(p)




#### 
## Figure 8
####




## Figure 8 made with output from LarsonLewis2024_1_PrepData.R 
## with the network visualization software Gephi
## See lines 862-944 of LarsonLewis2024_1_PrepData.R for 
## instructions


##########################################
## Network Similarity
##########################################


nwsimilarity <- function(nw){
  absdifavg1 <- c()
  absdifavg_e <- c()
  uniqueneighbsize <- c()
  
  for (i in 1:vcount(nw)){
    # All neighbors:
    neighbors <- neighbors(nw, v = i, mode = "all") #neighbors does not include ego, does include in and out
    uniqueneighb <- unique(neighbors)
    uniqueneighbsize[i] <- length(uniqueneighb) #number of network neighbors
    absdifs <-  abs(V(nw)$proref1[i] - V(nw)$proref1[uniqueneighb])
    absdifavg1[i] <- ifelse(uniqueneighbsize[i] != 0, sum(absdifs)/uniqueneighbsize[i], NA)
    absdifs_e <- abs(V(nw)$proref_e[i] - V(nw)$proref_e[uniqueneighb])
    eldenom <- sum(!is.na(absdifs_e))
    absdifavg_e[i] <- ifelse(eldenom != 0, sum(absdifs_e, na.rm=T)/eldenom, NA)
  }
  
  nwclusters <- cbind(V(nw)$hhnum, uniqueneighbsize, absdifavg1, absdifavg_e)
  nwclusters <- as.data.frame(nwclusters)
  
  return(nwclusters)
  
}

test <- nwsimilarity(g1)
rm(test)

## Now generate these for all the networks in all the villages and report the summary stat,
## and also compare treated and control

nwlist <- list(g1, g2, g3, g4)
nwlistnames <- c("V1 Union", "V2 Union", "V3 Union", "V4 Union")
outmat <- matrix(data = NA, nrow = 7, ncol = length(nwlist))

for (i in 1:length(nwlist)){
  thenw <- nwlist[[i]]
  clust <- nwsimilarity(thenw)
  outmat[1,i] <- i
  outmat[2,i] <- mean(clust$absdifavg1, na.rm=T)
  outmat[3,i] <- mean(clust$absdifavg_e, na.rm=T)
  trt <- which(V(thenw)$treatment ==1)
  cnt <- which(V(thenw)$treatment == 0)
  outmat[4,i] <- mean(clust$absdifavg1[trt], na.rm = T)
  outmat[5,i] <- mean(clust$absdifavg_e[trt], na.rm = T)
  outmat[6,i] <- mean(clust$absdifavg1[cnt], na.rm=T)
  outmat[7,i] <- mean(clust$absdifavg_e[cnt], na.rm=T)
  rm(clust)
  rm(thenw)
}

colnames(outmat) <- nwlistnames

rownames(outmat) <- c("Vlg Number", "AbsDifAvg1", "AbsDifAvg_e", "AbsDifAvg1TRT", 
                      "AbsDifAvg_eTRT", "AbsDifAvg1CNT", "AbsDifAvg_eCNT")

## PAPER TABLE 4
xtable(outmat, align = "lrrrr", digits = 2)

rm( nwlist, nwlistnames, outmat)



##########################################
## Regression Analysis
#########################################

## Now, regress endline scores on treatment, neighborhood treatment variables,
## and network distance variables


m1 <-  lm(proref_e ~ treatment + someneighbtreat + treatment*someneighbtreat +
                 neighbsize + neighbsize*treatment + proref1 + neighbproref +
                 neighbproref*treatment, data = Data)

m2 <- lm(proref_e ~ treatment + someneighbtreat +treatment*someneighbtreat +
                neighbsize + neighbsize*treatment + proref1 + neighbproref +
                neighbproref*treatment + disttomostpos  , data = Data)

m3 <- lm(proref_e ~ treatment + someneighbtreat +treatment*someneighbtreat +
                 neighbsize + neighbsize*treatment + proref1 + neighbproref +
                 neighbproref*treatment + disttomostneg  , data = Data)

m4 <- lm(proref_e ~ treatment + someneighbtreat +treatment*someneighbtreat +
                 neighbsize + neighbsize*treatment + proref1 + neighbproref +
                 neighbproref*treatment + disttopersuaded , data = Data)

m5 <- lm(proref_e ~ treatment + someneighbtreat +treatment*someneighbtreat +
                 neighbsize + neighbsize*treatment + proref1 + neighbproref +
                 neighbproref*treatment + disttocranky2 , data = Data)


## Include all distance variables
m6 <- lm(proref_e ~ treatment + someneighbtreat +treatment*someneighbtreat +
                 neighbsize + neighbsize*treatment + proref1 + neighbproref +
                 neighbproref*treatment + disttomostpos + 
                 disttomostneg + disttopersuaded + disttocranky2 , data = Data)

## PAPER TABLE 5
#tab:distanceregs
stargazer(m1, m2, m3, m4, m5, m6,
          keep.stat = c("n", "rsq"),
          single.row = FALSE)




## The reference nodes in the distance variables (the ones with distance 0):
table(Data$disttomostpos) #33 reference nodes
table(Data$disttomostneg) #14 reference nodes
table(Data$disttopersuaded) #16 reference nodes
table(Data$disttocranky2) #15 reference nodes




## Now run the placebo test for this specification for the treated, using
## baseline2 as the outcome instead of the endline scores, and using
## the indicators for being the reference nodes

m6t_refs_placebo <- lm(proref2 ~ someneighbtreat  +
                         neighbsize +  proref1 + neighbproref +
                         mostpos + mostneg + mostpersuaded + mostcranky+
                         disttomostpos +disttomostneg +
                         disttopersuaded + disttocranky2, data = Data[t,])

## PAPER TABLE 6
## tab:placebo

#stargazer(m6t_refs_placebo,
# keep.stat = c("n", "rsq"))
## New format, single line
stargazer(m6t_refs_placebo, single.row = TRUE,
          keep.stat = c("n", "rsq"))




rm(m1, m2, m3, m4, m5, m6, m6t_refs_placebo)















#######################
## Qualitative Validation
######################


makeatableposttrtcom <- function(datalist, datalistnames){
  
  #Make an empty matrix with a col for each dataset, row for each variable
  outmat <- matrix(data = NA, nrow = 6, ncol = length(datalist))
  
  for (i in 1:length(datalist)){
    thedata <- datalist[[i]]
    outmat[1,i] <- mean(thedata$village_number)
    outmat[2,i] <- mean(thedata$converse[thedata$in_el==1]=="Yes")
    outmat[3,i] <- sum(thedata$ref_topic[thedata$converse == "Yes" & thedata$in_el ==1] == "more often than usual")/
      length(thedata$ref_topic[thedata$converse == "Yes" & thedata$in_el ==1])
    outmat[4,i] <- sum(thedata$news_nature[thedata$converse == "Yes" & thedata$in_el ==1] == "All positive" | thedata$news_nature[thedata$converse == "Yes" & thedata$in_el ==1] == "Mostly positive")/
      length(thedata$news_nature[thedata$converse == "Yes" & thedata$in_el ==1])
    outmat[5,i] <- sum(thedata$nature_topic[thedata$converse == "Yes" & thedata$in_el ==1] == "All support" | thedata$nature_topic[thedata$converse == "Yes" & thedata$in_el ==1]=="Mostly support")/
      length(thedata$nature_topic[thedata$converse == "Yes" & thedata$in_el ==1])
    outmat[6,i] <- length(thedata$village_number[thedata$converse == "Yes" & thedata$in_el == 1])
    
  }
  
  colnames(outmat) <- datalistnames
  rownames(outmat) <- c("Vlg Number", "Had Convo about Refugees",
                        "Came Up More Often","Mostly Positive", "Mostly Supportive",
                        "# who had convo")
  dig <- matrix(2, ncol = length(datalist) + 1, nrow = 6)
  dig[6,] <- 0
  
  return(xtable(outmat, digits = dig))
  
}

## PAPER TABLE 7

makeatableposttrtcom(datalist = list(v1[t1,], v1[c1,], v2[t2,], v2[c2,], v3[t3,], v3[c3,], v4[t4,], v4[c4,], Data[t,], Data[c,]), 
                     datalistnames = c("V1 T", "V1 C", "V2 T", "V2 C", "V3 T", "V3 C", "V4 T", "V4 C", "All T", "All C"))




## Add column pooling totals (far right column of Table 7):

makeatableposttrtcom(datalist=list(Data), datalistnames = c("All"))

rm(makeatableposttrtcom)




### Paper Table 8
## Network similarity using the spokeref network (uses the nwsimilarity function
## built above)

nwlist <- list(v1n$spokeref, v2n$spokeref, v3n$spokeref, v4n$spokeref)
nwlistnames <- c("V1", "V2", "V3", "V4")
outmat <- matrix(data = NA, nrow = 3, ncol = length(nwlist))

for (i in 1:length(nwlist)){
  thenw <- nwlist[[i]]
  clust <- nwsimilarity(thenw)
  outmat[1,i] <- i
  outmat[2,i] <- mean(clust$absdifavg1, na.rm=T)
  outmat[3,i] <- mean(clust$absdifavg_e, na.rm=T)
  
  rm(clust)
  rm(thenw)
}



colnames(outmat) <- nwlistnames

rownames(outmat) <- c("Vlg Number", "Refugee Convo Difference, Baseline",
                      "Refugee Convo Difference, Endline") 


xtable(outmat, align = "lrrrr", digits = 2)








#####################################################################
#####################################################################
## SUPPORTING INFORMATION ANALYSES
####################################################################
####################################################################

#####
## SI Table 1
#####

## B.2, Assessing Information Updating Assumptions
## Make indices

t <- which(Data$treatment == 1)
t30 <- which(Data$treatment == 1 & Data$proref1 < 30)
t28 <- which(Data$treatment == 1 & Data$proref1 < 28)

b21 <- lm(proref_st ~ proref1, data = Data[t,])
b22 <- lm(proref_st ~ proref1, data = Data[t30,])
b23 <- lm(proref_st ~ proref1, data = Data[t28,])

stargazer(b21, b22, b23, single.row = FALSE,
          keep.stat = c("n", "rsq"))




##### 
## SI Figure 1
#####

## Attitudes Histrograms Broken into Constituent Questions and separated
## by village

## Construct a ggplot dataframe for separate attitude indices, baseline 1, 2, and endline

makehistdf <- function(data, rowind= c(1:nrow(data))){
  data <- data[rowind,]
  
  df <- data.frame(Count0 = c(table(factor(data$noprob1, levels = 1:5)), 
                              table(factor(data$fit1, levels = 1:5)),
                              table(factor(data$burden1, levels = 1:5)),
                              table(factor(data$values1, levels = 1:5)),
                              table(factor(data$land1, levels = 1:5)),
                              table(factor(data$threat1, levels = 1:5))),
                   Percent0 = c(table(factor(data$noprob1, levels = 1:5))/nrow(data),
                                table(factor(data$fit1, levels = 1:5))/nrow(data),
                                table(factor(data$burden1, levels  = 1:5))/nrow(data),
                                table(factor(data$values1, levels = 1:5))/nrow(data),
                                table(factor(data$land1, levels = 1:5))/nrow(data),
                                table(factor(data$threat1, levels = 1:5))/nrow(data)),
                   Count1 = c(table(factor(data$noprob2, levels = 1:5)),
                              table(factor(data$fit2, levels = 1:5)),
                              table(factor(data$burden2, levels = 1:5)),
                              table(factor(data$values2, levels = 1:5)),
                              table(factor(data$land2, levels = 1:5)),
                              table(factor(data$threat2, levels = 1:5))),
                   Percent1 = c(table(factor(data$noprob2, levels = 1:5))/nrow(data),
                                table(factor(data$fit2, levels = 1:5))/nrow(data),
                                table(factor(data$burden2, levels = 1:5))/nrow(data),
                                table(factor(data$values2, levels = 1:5))/nrow(data),
                                table(factor(data$land2, levels = 1:5))/nrow(data),
                                table(factor(data$threat2, levels = 1:5))/nrow(data)),
                   Count2 = c(table(factor(data$noprob_e, levels = 1:5)),
                              table(factor(data$fit_e, levels = 1:5)),
                              table(factor(data$burden_e, levels = 1:5)),
                              table(factor(data$values_e, levels = 1:5)),
                              table(factor(data$land_e, levels = 1:5)),
                              table(factor(data$threat_e, levels = 1:5))),
                   Percent2 = c(table(factor(data$noprob_e, levels = 1:5))/nrow(data),
                                table(factor(data$fit_e, levels = 1:5))/nrow(data),
                                table(factor(data$burden_e, levels = 1:5))/nrow(data),
                                table(factor(data$values_e, levels = 1:5))/nrow(data),
                                table(factor(data$land_e, levels = 1:5))/nrow(data),
                                table(factor(data$threat_e, levels = 1:5))/nrow(data)),
                   Question = c(rep("NoProb", 5),
                                rep("Fit", 5),
                                rep("Burden", 5),
                                rep("Values", 5),
                                rep("Land", 5),
                                rep("Threat", 5)),
                   Score = c(1:5,
                             1:5,
                             1:5,
                             1:5,
                             1:5,
                             1:5))
  out <- df
  return(out)
  
}

## Dataframes in correct format to use ggplot to build histograms of attitudes
## This set is for the treated only

df1t <- makehistdf(v1, t1)
df2t <- makehistdf(v2, t2)
df3t <- makehistdf(v3, t3)
df4t <- makehistdf(v4, t4)


atthist0 <- function(ggdf, title = "Baseline Attitudes"){ 
  
  p <- ggplot(data=ggdf, aes_string(x = "Score", y= "Percent0", fill = "Question"))+ #using string lets feed yvar
    geom_bar(position = "dodge", stat="identity")+
    scale_fill_brewer(palette = "Accent")+
    ylim(0,1)+
    labs(title = title, y="")+
    theme_minimal()
  #  theme(legend.position = "none")
  out <- p
  return(out)
}

## Plots for baseline, treated:
## Save as V#_atts_b1.pdf, 4 by 7

## SI Figure 1, 4 parts:
atthist0(df1t, title = "V1 Baseline")
atthist0(df2t, title = "V2 Baseline")
atthist0(df3t, title = "V3 Baseline")
atthist0(df4t, title = "V4 Baseline")

#####
## SI Figure 2
#####


atthist1 <- function(ggdf, title = ""){ 
  
  p <- ggplot(data=ggdf, aes_string(x = "Score", y= "Percent1", fill = "Question"))+ #using string lets feed yvar
    geom_bar(position = "dodge", stat="identity")+
    scale_fill_brewer(palette = "Accent")+
    ylim(0,1)+
    labs(title = title, y="")+
    theme_minimal()
  #  theme(legend.position = "none")
  out <- p
  return(out)
}

## Plots for baseline 2, treated:
## Save as V#_atts_b2.pdf, 4 by 7

## SI Figure 2, 4 parts

atthist1(df1t, title = "V1 Baseline 2")
atthist1(df2t, title = "V2 Baseline 2")
atthist1(df3t, title = "V3 Baseline 2")
atthist1(df4t, title = "V4 Baseline 2")


#####
## SI Figure 3
#####

atthist2 <- function(ggdf, title = ""){ 
  
  p <- ggplot(data=ggdf, aes_string(x = "Score", y= "Percent2", fill = "Question"))+ #using string lets feed yvar
    geom_bar(position = "dodge", stat="identity")+
    scale_fill_brewer(palette = "Accent")+
    ylim(0,1)+
    labs(title = title, y="")+
    theme_minimal()
  #  theme(legend.position = "none")
  out <- p
  return(out)
}

## Plots for endline, treated:
## Save as V#_atts_e.pdf, 4 by 7


## SI Figure 3, 4 parts
atthist2(df1t, title = "V1 Endline")
atthist2(df2t, title = "V2 Endline")
atthist2(df3t, title = "V3 Endline")
atthist2(df4t, title = "V4 Endline")


rm(df1t, df2t, df3t, df4t)
rm(atthist0, atthist1, atthist2)
rm(makehistdf)




####
## SI Table 2
#####


## Table of response to treatment broken down by treatment condition and village

makeatabletrtatts <- function(datalist, datalistnames){
  
  #Make an empty matrix with a col for each dataset, row for each variable
  outmat <- matrix(data = NA, nrow = 13, ncol = length(datalist))
  
  for (i in 1:length(datalist)){
    thedata <- datalist[[i]]
    outmat[1,i] <- mean(thedata$village_number)
    outmat[2,i] <- mean(thedata$proref1)
    outmat[3,i] <- mean(thedata$proref2[thedata$treatment == 1])
    outmat[4,i] <- mean(thedata$proref_e[thedata$in_el==1])
    outmat[5,i] <- mean(thedata$proref_st[thedata$treatment == 1])
    outmat[6,i] <- mean(thedata$proref_lt[thedata$in_el == 1])
    outmat[7,i] <- sum(thedata$proref_st[thedata$treatment == 1] == 0) / length(thedata$proref_st[thedata$treatment == 1])
    outmat[8,i] <- sum(thedata$proref_st[thedata$treatment == 1] > 0) / length(thedata$proref_st[thedata$treatment == 1])
    outmat[9,i] <- sum(thedata$proref_st[thedata$treatment == 1] < 0) / length(thedata$proref_st[thedata$treatment == 1])
    outmat[10,i] <- sum(thedata$proref_lt[thedata$in_el == 1] == 0) / length(thedata$proref_lt[thedata$in_el == 1])
    outmat[11,i] <- sum(thedata$proref_lt[thedata$in_el == 1] > 0) / length(thedata$proref_lt[thedata$in_el == 1])
    outmat[12,i] <- sum(thedata$proref_lt[thedata$in_el == 1] < 0) / length(thedata$proref_lt[thedata$in_el == 1])
    outmat[13,i] <- nrow(thedata)
  }
  
  colnames(outmat) <- datalistnames
  rownames(outmat) <- c("Vlg Number", "Pro-ref score, bl", "Pro-ref score, bl2", 
                        "Pro-ref score, el", "Short-term change", 
                        "Long-term change", "Prop s.t. change 0", 
                        "Prop s.t. change > 0", "Prop s.t. change < 0",
                        "Prop l.t. change 0", "Prop l.t. change > 0", 
                        "Prop l.t. change < 0", "n")
  
  dig <- matrix(2, ncol = length(datalist) + 1, nrow = 13)
  dig[1:6,] <- 1
  dig[13,] <- 0
  
  
  return(xtable(outmat, digits = dig))
  
}

## Table broken down further by treated and control within each village:

## SI Table 2:
makeatabletrtatts(datalist = list(v1[t1,], v1[c1,], v2[t2,], v2[c2,], v3[t3,], v3[c3,], v4[t4,], v4[c4,], Data[t,], Data[c,]), 
                  datalistnames = c("V1 T", "V1 C", "V2 T", "V2 C", "V3 T", "V3 C", "V4 T", "V4 C", "All T", "All C"))

rm(makeatabletrtatts)


#####
## SI Table 3
#####

## Average Treatment Effect and Early Spillovers

## Seems as though spillovers could have affected even baseline scores once
## treated started rolling out.  Let's take the ATE by comparing 
## treated randomly selected to be on day 1 of the treatment wave and the control.

## Identify the first day of the treatment wave in each village

firsttrtv1 <- min(Data$today[Data$village_number == 1 & Data$treatment == 1])
firsttrtv2 <- min(Data$today[Data$village_number == 2 & Data$treatment == 1])
## V2 had first treated on first day of enumeration by accident, only control followed
firsttrtv2b <- sort(unique(Data$today[Data$village_number == 2 & Data$treatment == 1], decreasing = FALSE))[2]
## Start of treatment WAVE in v2
firsttrtv3 <- min(Data$today[Data$village_number == 3 & Data$treatment == 1])
firsttrtv4 <- min(Data$today[Data$village_number == 4 & Data$treatment == 1])


## Now make treated day 1 indicators for each village

t1day1 <- which(v1$treatment == 1 & v1$today == firsttrtv1)
t2day1 <- which(v2$treatment == 1 & v2$today %in% c(firsttrtv2, firsttrtv2b))
t3day1 <- which(v3$treatment == 1 & v3$today == firsttrtv3)
t4day1 <- which(v4$treatment == 1 & v4$today == firsttrtv4)

## And make day 1 indicator in the full dataset for pooled calculations

## make index for the treated in the full dataset 

tday1 <- which((Data$treatment == 1 & Data$village_number == 1 & Data$today == firsttrtv1)|
                 (Data$treatment == 1 & Data$village_number == 2 & Data$today %in% c(firsttrtv2, firsttrtv2b))|
                 (Data$treatment == 1 & Data$village_number == 3 & Data$today == firsttrtv3)|
                 (Data$treatment == 1 & Data$village_number == 4 & Data$today == firsttrtv4) )


## Make sure treatment and control indicators are up to date:

c1 <- which(v1$treatment == 0)
c2 <- which(v2$treatment == 0)
c3 <- which(v3$treatment == 0)
c4 <- which(v4$treatment == 0)

c <- which(Data$treatment == 0)


## Now make a table that shows the imbalance between treated and control,
## but better balance between treated on Day 1 and control

tday1vc <- c(t.test(v1$proref1[t1day1], v1$proref1[c1])$p.value,
             t.test(v2$proref1[t2day1], v2$proref1[c2])$p.value,
             t.test(v3$proref1[t3day1], v3$proref1[c3])$p.value,
             t.test(v4$proref1[t4day1], v4$proref1[c4])$p.value,
             t.test(Data$proref1[tday1], Data$proref1[c])$p.value)

tvc <- c(t.test(v1$proref1[t1], v1$proref1[c1])$p.value,
         t.test(v2$proref1[t2], v2$proref1[c2])$p.value,
         t.test(v3$proref1[t3], v3$proref1[c3])$p.value,
         t.test(v4$proref1[t4], v4$proref1[c4])$p.value,
         t.test(Data$proref1[t], Data$proref1[c])$p.value)    

tproref1 <- c(mean(v1$proref1[t1]),
              mean(v2$proref1[t2]),
              mean(v3$proref1[t3]),
              mean(v4$proref1[t4]),
              mean(Data$proref1[t]))

cproref1 <- c(mean(v1$proref1[c1]),
              mean(v2$proref1[c2]),
              mean(v3$proref1[c3]),
              mean(v4$proref1[c4]),
              mean(Data$proref1[c]))

tday1proref1 <- c(mean(v1$proref1[t1day1]),
                  mean(v2$proref1[t2day1]),
                  mean(v3$proref1[t3day1]),
                  mean(v4$proref1[t4day1]),
                  mean(Data$proref1[tday1]))

balance <- cbind(cproref1,
                 tproref1,
                 tvc,
                 cproref1,
                 tday1proref1,
                 tday1vc
)

rownames(balance) <- c("V1", "V2", "V3", "V4", "Pooled")

colnames(balance) <- c("Control", "Treated", "p-val", "Control", "Treated Day 1", "p-val")

digits <- matrix(2, nrow = 5, ncol = 7)
digits[,c(4,7)] <- 3

## SI Table 3:
xtable(balance, digits = digits)


##### 
## SI Table 4
#####



naive <- c(mean(v1$proref2[t1]) - mean(v1$proref1[c1]),
           mean(v2$proref2[t2]) - mean(v2$proref1[c2]),
           mean(v3$proref2[t3]) - mean(v3$proref1[c3]),
           mean(v4$proref2[t4]) - mean(v4$proref1[c4]),
           mean(Data$proref2[t]) - mean(Data$proref1[c]))

naive_p <- c(t.test(v1$proref2[t1], v1$proref1[c1])$p.value,
             t.test(v2$proref2[t2], v2$proref1[c1])$p.value,
             t.test(v3$proref2[t3], v3$proref1[c1])$p.value,
             t.test(v4$proref2[t4], v4$proref1[c1])$p.value,
             t.test(Data$proref2[t], Data$proref1[c])$p.value)

fixed <-  c(mean(v1$proref2[t1day1]) - mean(v1$proref1[c1]),
            mean(v2$proref2[t2day1]) - mean(v2$proref1[c2]),
            mean(v3$proref2[t3day1]) - mean(v3$proref1[c3]),
            mean(v4$proref2[t4day1]) - mean(v4$proref1[c4]),
            mean(Data$proref2[tday1]) - mean(Data$proref1[c]))

fixed_p <- c(t.test(v1$proref2[t1day1], v1$proref1[c1])$p.value,
             t.test(v2$proref2[t2day1], v2$proref1[c1])$p.value,
             t.test(v3$proref2[t3day1], v3$proref1[c1])$p.value,
             t.test(v4$proref2[t4day1], v4$proref1[c1])$p.value,
             t.test(Data$proref2[tday1], Data$proref1[c])$p.value)

ncont <- c(length(c1),
           length(c2),
           length(c3), 
           length(c4),
           length(c))

ntrt <- c(length(t1),
          length(t2),
          length(t3), 
          length(t4),
          length(t))

ntrtday1 <- c(length(t1day1),
              length(t2day1),
              length(t3day1), 
              length(t4day1),
              length(tday1))

## Make a table of ATEs calculated in the naive and corrected way

ates <- rbind(naive,
              naive_p,
              fixed,
              fixed_p,
              ncont,
              ntrt,
              ntrtday1)

rownames(ates) <- c("Naive ATE", "p-value", "Corrected ATE", "p-value", 
                    "n cont", "n trt", "n trt day 1")

colnames(ates) <- c("V1", "V2", "V3", "V4", "Pooled")

ates <- as.data.frame(ates)

digits <- matrix(data = 2, nrow = 7, ncol = 6)
digits[5:7,] <- 0
digits[c(2,4),] <- 3

## SI Table 4
xtable(ates, digits = digits)




#####
## SI Table 5
#####

## Explore attrition

## First, make an indicator for the attritted and the rest of the village and compare

atrit1 <- which(v1$in_el == 0)
atrit2 <- which(v2$in_el == 0)
atrit3 <- which(v3$in_el == 0)
atrit4 <- which(v4$in_el == 0)

## Compare attritted and rest within each village

datalist <- list(v1[atrit1,], v1[-atrit1,], v2[atrit2,], v2[-atrit2,], 
                 v3[atrit3,], v3[-atrit3,], v4[atrit4,], v4[-atrit4,])
datalistnames <- c("V1 At", "V1 Stayed", "V2 At", "V2 Stayed",
                   "V3 At", "V3 Stayed", "V4 At", "V4 Stayed")

attritmat <- matrix(data = NA, nrow = 15, ncol = length(datalist))


for (i in 1:length(datalist)){
  thedata <- datalist[[i]]
  attritmat[1,i] <- nrow(thedata)
  attritmat[2,i] <- round(mean(thedata$resp_age), 0)
  attritmat[3,i] <- round(mean(thedata$religion == "Protestant"),2)
  attritmat[4,i] <- round(mean(thedata$religion == "Catholic"),2)
  attritmat[5,i] <- round(mean(thedata$religion == "Muslim"),2)
  attritmat[6,i] <- round(mean(thedata$farmer == 1),2)
  attritmat[7,i] <- round(mean(thedata$resp_occupation == "Trader"),2)
  attritmat[8,i] <- mean(thedata$resp_educ_coarse == "None")
  attritmat[9,i] <- mean(thedata$resp_educ_coarse == "SomePrimary")
  attritmat[10,i] <- mean(thedata$resp_educ_coarse == "SomeSecondary")
  attritmat[11,i] <- mean(thedata$resp_educ_coarse == "SomeCollege")
  attritmat[12,i] <- round(mean(thedata$years_live == "More than 5 years" ),2)
  attritmat[13,i] <- mean(thedata$proref1)
  attritmat[14,i] <- mean(thedata$proref2, na.rm=T)
  attritmat[15,i] <- mean(thedata$treatment)
  rm(thedata)
}

colnames(attritmat) <- datalistnames
rownames(attritmat) <- c("n", "Age", "Protestant", "Catholic", "Muslim", 
                         "Farmer", "Trader", "No Educ", "Primary Educ", "Secondary Educ", "College Educ", "Lived > 5yrs",
                         "Baseline 1 Proref","Baseline 2 Proref", "Treated")

digits <- matrix(data = 2, nrow = 15, ncol = 9)
digits[c(1,2),] <- 0
digits[c(13,14),] <- 1

## SI Table 5
xtable(attritmat, digits = digits, align = c("r|rr|rr|rr|rr"))

## Also compare attritted and rest in terms of network attributes

#####
## SI Table 6
#####

## We do pooled analyses, so the harder test for the null that the attritted are
## not menaingfully different from the rest is the one that pools the four villages:

atrit <- which(Data$in_el == 0)

datalist <- list(Data[atrit,], Data[-atrit,])
datalistnames <- c("Attritted", "Stayed")

attritmat3 <- matrix(data = NA, nrow = 8, ncol = length(datalist))


for (i in 1:length(datalist)){
  thedata <- datalist[[i]]
  attritmat3[1,i] <- nrow(thedata)
  attritmat3[2,i] <- mean(thedata$someneighbtreat)
  attritmat3[3,i] <- mean(thedata$neighbsize)
  attritmat3[4,i] <- mean(thedata$neighbproref, na.rm=T)
  attritmat3[5,i] <- mean(thedata$disttomostpos, na.rm=T)
  attritmat3[6,i] <- mean(thedata$disttomostneg, na.rm=T)
  attritmat3[7,i] <- mean(thedata$disttopersuaded, na.rm=T)
  attritmat3[8,i] <- mean(thedata$disttocranky2, na.rm=T)
  rm(thedata)
}



colnames(attritmat3) <- datalistnames
rownames(attritmat3) <- c("n", "Treated Neighbs", "# Neighbs", "Neighbs Bl Atts", 
                          "Dist to Warmest", "Dist to Coldest", "Dist to Persuaded", 
                          "Dist to Backlashed")


pvals <- c(NA,t.test(Data$someneighbtreat[atrit], Data$someneighbtreat[-atrit])$p.value,
           t.test(Data$neighbsize[atrit], Data$neighbsize[-atrit])$p.value,
           t.test(Data$neighbproref[atrit], Data$neighbproref[-atrit])$p.value,
           t.test(Data$disttomostpos[atrit], Data$disttomostpos[-atrit])$p.value,
           t.test(Data$disttomostneg[atrit], Data$disttomostneg[-atrit])$p.value,
           t.test(Data$disttopersuaded[atrit], Data$disttopersuaded[-atrit])$p.value,
           t.test(Data$disttocranky2[atrit], Data$disttocranky2[-atrit])$p.value)

attritmat3 <- cbind(attritmat3, "p-vals" = pvals)



digits <- matrix(data = 1, nrow = 8, ncol = 4)
digits[1,] <- 0
digits[,4] <- 2


## Table comparing attritted and stayed for four villages pooled,
## SI Table 6
xtable(attritmat3, digits = digits)


#####
## SI Table 7
#####



## Network Difference Analyses

## Simulate a level shift for everyone equal to the mean of their village's 
## increase, respecting the cap of the index.

levelshift <- function(nwlist, nwlistnames, shiftvec){#the proref_e attribute will be replaced by 
  #a level shift to proref1 the size of shift for everyone.
  
  outmat <- matrix(data = NA, nrow = 3, ncol = length(nwlist)) #Create an empty matrix
  
  for (i in 1:length(nwlist)){
    
    thenw <- nwlist[[i]] #Grab a network from the list
    
    V(thenw)$simproref_e <- V(thenw)$proref1 + shiftvec[i] #Create a simulated endline score in the network
    overcaps <- which(V(thenw)$simproref_e > 30) #ID the simulated endline scores that go above the index ceiling
    V(thenw)$simproref_e[overcaps] <- 30 #Replace these with the cap of 30
    
    ## Now calculate the network similarity scores assuming this is the endline score
    
    absdifavg1 <- c()
    absdifavg_e <- c()
    uniqueneighbsize <- c()
    
    for (j in 1:vcount(thenw)){
      # All neighbors:
      neighbors <- neighbors(thenw, v = j, mode = "all") #neighbors does not include ego, does include in and out
      uniqueneighb <- unique(neighbors)
      uniqueneighbsize[j] <- length(uniqueneighb) #number of network neighbors
      absdifs <-  abs(V(thenw)$proref1[j] - V(thenw)$proref1[uniqueneighb])
      absdifavg1[j] <- ifelse(uniqueneighbsize[j] != 0, sum(absdifs)/uniqueneighbsize[j], NA)
      absdifs_e <- abs(V(thenw)$simproref_e[j] - V(thenw)$simproref_e[uniqueneighb]) #replaced w/ simulated endline
      eldenom <- sum(!is.na(absdifs_e))
      absdifavg_e[j] <- ifelse(eldenom != 0, sum(absdifs_e, na.rm=T)/eldenom, NA)
    }
    
    clust <- cbind(V(thenw)$hhnum, uniqueneighbsize,  absdifavg1, absdifavg_e)
    clust <- as.data.frame(clust)
    
    ## Now fill out the matrix with aggregates of these similarity measures
    
    outmat[1,i] <- i
    outmat[2,i] <- mean(clust$absdifavg1, na.rm=T)
    outmat[3,i] <- mean(clust$absdifavg_e, na.rm=T)
    rm(clust)
    rm(thenw)
  }
  
  colnames(outmat) <- nwlistnames
  
  rownames(outmat) <- c("Vlg Number", "AbsDifAvg1", "AbsDifAvg_e")
  
  return(outmat)
  
  
}

## Calculate the level shifts in each village
level1 <- mean(v1$proref_lt, na.rm=T)
level2 <- mean(v2$proref_lt, na.rm=T)
level3 <- mean(v3$proref_lt, na.rm=T)
level4 <- mean(v4$proref_lt, na.rm=T)


shiftvec <- c(level1, level2, level3, level4) #vector of level shifts for the 4 villages


test1 <- levelshift(nwlist = list(g1, g2, g3, g4),
                    nwlistnames = c("V1 Union", "V2 Union", "V3 Union", "V4 Union"),
                    shiftvec = shiftvec)

## SI Table 7
xtable(test1, align = "lrrrr", digits = 2) 

rm(test1, shiftvec, levelshift)
rm(level1, level2, level3, level4)

#####
## SI Figure 4
#####

## For this figure, we'll use the plyr library.  (Don't load earlier,
## masks a function needed above.)

library(plyr)

## Finally, let's generate the sampling distribution network similarity
## given randomly reshuffled pairs of baseline and endline. We need to know
## the range of possible differences for our observed network when the baseline
## and endline scores we observed are thrown down at random.  This standard
## deviation will let us quantify the size of our observed shift.  

shufflepairs <- function(nw, nsims = 1000){#assumes has proref1 and proref_e and treatment as attribute of nw
  
  avgdifs1 <- c()
  avgdifs_e <- c()
  theNAs1 <- c()
  theNAs_e <- c()
  avgdifstrt1 <- c() #separately store info for subset of treated nodes
  avgdifscnt1 <- c()
  avgdifstrt_e <- c() #seprately store info for subset of control nodes
  avgdifscnt_e <- c()
  
  for (i in 1:nsims){
    
    thenw <- nw
    neworder <- sample(1:vcount(nw), size = vcount(nw), replace = FALSE) #shuffle index 
    V(thenw)$simproref1 <- V(nw)$proref1[neworder]
    V(thenw)$simproref_e <- V(nw)$proref_e[neworder]
    
    ## make an index for which nodes are the treated ones
    ## Note: treatment status isn't randomly reassigned, it's a fixed attribute IDing a subset of nodes
    
    trt <- which(V(thenw)$treatment == 1)
    cnt <- which(V(thenw)$treatment == 0)
    
    
    absdifavg1 <- c()
    absdifavg_e <- c() #make a vector to store node-level avg absolute neighborhood differences
    
    
    for (j in 1:vcount(thenw)){
      # All neighbors:
      
      neighbors <- neighbors(thenw, v = j, mode = "all") #neighbors does not include ego, does include in and out
      uniqueneighb <- unique(neighbors)
      absdifs1 <- abs(V(thenw)$simproref1[j] - V(thenw)$simproref1[uniqueneighb])
      absdifstot1 <- sum(absdifs1) #add up absolute differences
      absdifavg1[j] <- ifelse(length(uniqueneighb) != 0, absdifstot1/length(uniqueneighb), NA) #store average
      
      absdifs_e <- abs(V(thenw)$simproref_e[j] - V(thenw)$simproref_e[uniqueneighb]) 
      absdifstot_e <- sum(absdifs_e, na.rm=T) 
      eldenom <- sum(!is.na(absdifs_e))
      absdifavg_e[j] <- ifelse(eldenom != 0, absdifstot_e/eldenom, NA)
      
    }
    
    avgdifs1[i] <- mean(absdifavg1, na.rm=T)
    avgdifs_e[i] <- mean(absdifavg_e, na.rm=T)
    theNAs1[i] <- sum(is.na(absdifavg1))
    theNAs_e[i] <- sum(is.na(absdifavg_e))
    avgdifstrt1[i] <- mean(absdifavg1[trt], na.rm=T)
    avgdifstrt_e[i] <- mean(absdifavg_e[trt], na.rm=T)
    avgdifscnt1[i] <- mean(absdifavg1[cnt], na.rm=T)
    avgdifscnt_e[i] <- mean(absdifavg_e[cnt], na.rm=T)
    
    if(i %% 20 == 0){
      print(i)
    }
    
  }
  
  out <- cbind(avgdifs1, avgdifs_e, theNAs1, theNAs_e, avgdifstrt1, avgdifstrt_e, avgdifscnt1, avgdifscnt_e)
  out <- as.data.frame(out)
  
  return(out)
  
}


test <- shufflepairs(g1, nsims = 20)

## Set the seed:
set.seed(54321)

shufp1 <- shufflepairs(g1, nsims = 1000)
shufp2 <- shufflepairs(g2, nsims = 1000)
shufp3 <- shufflepairs(g3, nsims = 1000)
shufp4 <- shufflepairs(g4, nsims = 1000)


## And quantify the change in network difference scores for evereyone,
## the treated, and the control in each village

sum(shufp1$avgdifs1 - shufp1$avgdifs_e >= (5.48-4.44)) 
sum(shufp2$avgdifs1 - shufp2$avgdifs_e >= (4.58-3.68)) 
sum(shufp3$avgdifs1 - shufp3$avgdifs_e >= (5.92-5.05)) 
sum(shufp4$avgdifs1 - shufp4$avgdifs_e >= (4.75-4.64)) 

sum(shufp1$avgdifstrt1 - shufp1$avgdifstrt_e >= (5.12-5.00)) 
sum(shufp2$avgdifstrt1 - shufp2$avgdifstrt_e >= (4.54-4.07)) 
sum(shufp3$avgdifstrt1 - shufp3$avgdifstrt_e >= (5.55-5.06)) 
sum(shufp4$avgdifstrt1 - shufp4$avgdifstrt_e >= (4.93-4.68)) 

## Implied p-values for the control:

sum(shufp1$avgdifscnt1 - shufp1$avgdifscnt_e >= (5.79-3.95)) /1000
sum(shufp2$avgdifscnt1 - shufp2$avgdifscnt_e >= (4.62-3.27)) /1000
sum(shufp3$avgdifscnt1 - shufp3$avgdifscnt_e >= (6.50-5.04)) /1000
sum(shufp4$avgdifscnt1 - shufp4$avgdifscnt_e >= (4.46-4.56))  /1000

## Let's make this plot in ggplot

simdata <- cbind(c(shufp1$avgdifs1, shufp2$avgdifs1, shufp3$avgdifs1, shufp4$avgdifs1),
                 c(shufp1$avgdifs_e, shufp2$avgdifs_e, shufp3$avgdifs_e, shufp4$avgdifs_e),
                 c(rep(1, nrow(shufp1)),
                   rep(2, nrow(shufp2)),
                   rep(3, nrow(shufp3)),
                   rep(4, nrow(shufp4)))
)

simdata <- as.data.frame(simdata)
names(simdata) <- c("Difs1", "Difs_e", "Vlg")
simdata$Change <- simdata$Difs1 - simdata$Difs_e

## Store the true values for our reference (from paper Table 4)
dif1_1 <- 5.48 
dif1_2 <- 4.58
dif1_3 <- 5.92
dif1_4 <- 4.75

dif_e_1 <- 4.44
dif_e_2 <- 3.68
dif_e_3 <- 5.05
dif_e_4 <- 4.64

dift1_1 <- 5.12 
dift1_2 <- 4.54
dift1_3 <- 5.55
dift1_4 <- 4.93

dift_e_1 <- 5.00
dift_e_2 <- 4.07
dift_e_3 <- 5.06
dift_e_4 <- 4.68

difc1_1 <- 5.79 
difc1_2 <- 4.62
difc1_3 <- 6.50
difc1_4 <- 4.46

difc_e_1 <- 3.95
difc_e_2 <- 3.27
difc_e_3 <- 5.04
difc_e_4 <- 4.56

simdata$dif1 <- c(rep(dif1_1, nrow(shufp1)),
                  rep(dif1_2, nrow(shufp2)),
                  rep(dif1_3, nrow(shufp3)),
                  rep(dif1_4, nrow(shufp4)))

simdata$dif_e <- c(rep(dif_e_1, nrow(shufp1)),
                   rep(dif_e_2, nrow(shufp2)),
                   rep(dif_e_3, nrow(shufp3)),
                   rep(dif_e_4, nrow(shufp4)))

simdata$dift1 <- c(rep(dift1_1, nrow(shufp1)),
                   rep(dift1_2, nrow(shufp2)),
                   rep(dift1_3, nrow(shufp3)),
                   rep(dift1_4, nrow(shufp4)))

simdata$dift_e <- c(rep(dift_e_1, nrow(shufp1)),
                    rep(dift_e_2, nrow(shufp2)),
                    rep(dift_e_3, nrow(shufp3)),
                    rep(dift_e_4, nrow(shufp4)))

simdata$difc1 <- c(rep(difc1_1, nrow(shufp1)),
                   rep(difc1_2, nrow(shufp2)),
                   rep(difc1_3, nrow(shufp3)),
                   rep(difc1_4, nrow(shufp4)))

simdata$difc_e <- c(rep(difc_e_1, nrow(shufp1)),
                    rep(difc_e_2, nrow(shufp2)),
                    rep(difc_e_3, nrow(shufp3)),
                    rep(difc_e_4, nrow(shufp4)))

## Add a column of village numbers as characters to use in the facet wrap

simdata$VlgNum <- NA

simdata$VlgNum[simdata$Vlg == 1] <- "Village 1"
simdata$VlgNum[simdata$Vlg == 2] <- "Village 2"
simdata$VlgNum[simdata$Vlg == 3] <- "Village 3"
simdata$VlgNum[simdata$Vlg == 4] <- "Village 4"


mu <- ddply(simdata, "VlgNum", summarise, grp.dif=mean(dif1 - dif_e))
mu

mut <- ddply(simdata, "VlgNum", summarise, grp.dift=mean(dift1 - dift_e))
mut

muc <- ddply(simdata, "VlgNum", summarise, grp.difc=mean(difc1 - difc_e))
muc

colors <- c("Pooled" = "blue", "Treated" = "hotpink", "Control" = "seagreen")



## Save as sampdist_t_v_c.pdf 6x7
## SI Figure 4

p <- ggplot(data = simdata, mapping = aes(x = Change))
p + geom_density()+
  facet_wrap(~VlgNum)+
  geom_vline(data = mu, aes(xintercept=grp.dif, color = "Pooled"), linetype="dashed", size=.5) +
  geom_vline(data = mut, aes(xintercept=grp.dift, color = "Treated"), linetype="dashed", size=.5) +
  geom_vline(data = muc, aes(xintercept=grp.difc, color = "Control"), linetype="dashed", size=.5) +
  labs(x = "Change in Network Similarity Score", 
       title = "Sampling Distribution of Change in Network Similarity Scores by Village",
       color = "Observed \nAverage \nChange for:")+
  scale_color_manual(values = colors) +
  theme_minimal()


#####
## SI Figure 5
#####


## And zooming in on Village 4 and the possibility of a difference between
## respondents who took the endline on schedule and those who took it later

simdata_v4 <- cbind(shufp4$avgdifs1, shufp4$avgdifs_e, rep(4, nrow(shufp4)))
simdata_v4 <- as.data.frame(simdata_v4)
names(simdata_v4) <- c("Difs1", "Difs_e", "Vlg")

simdata_v4$Change <- simdata_v4$Difs1 - simdata_v4$Difs_e

colors <- c("Pooled, Soon" = "blue", "Pooled, Late" = "darkorchid", "Treated, Soon" = "hotpink", 
            "Treated, Late" = "skyblue", "Control, Soon" = "seagreen","Control, Late" = "orange")

muall <- cbind(c("Later1" = 4.95),
               c("Later_e"= 4.29),
               c("Sooner1" = 4.54),
               c("Sooner_e" = 4.96),
               c("Later1_trt" = 5.1),
               c("Later_e_trt" = 4.4),
               c("Later1_cnt" = 4.7),
               c("Later_e_cnt" = 4.2),
               c("Sooner1_trt" = 4.8),
               c("Sooner_e_trt" = 5.0),
               c("Sooner1_cnt" = 4.2),
               c("Sooner_e_cnt" = 5.8))
muall <- as.data.frame(muall)
names(muall) <- c("Later1", "Later_e",
                  "Sooner1", "Sooner_e",
                  "Later1_trt", "Later_e_trt",
                  "Later1_cnt", "Later_e_cnt",
                  "Sooner1_trt", "Sooner_e_trt",
                  "Sooner1_cnt", "Sooner_e_cnt")

muall$PoolLate <- muall$Later1-muall$Later_e
muall$PoolSoon <- muall$Sooner1 - muall$Sooner_e
muall$TrtLate <- muall$Later1_trt - muall$Later_e_trt
muall$TrtSoon <- muall$Sooner1_trt - muall$Sooner_e_trt
muall$CntLate <- muall$Later1_cnt - muall$Later_e_cnt
muall$CntSoon <- muall$Sooner1_cnt - muall$Sooner_e_trt

## SI Figure 5
## Save as sampdist_v4_soon_v_late.pdf, 6x7
p <- ggplot(data = simdata_v4, mapping = aes(x = Change))
p+geom_density()+
  geom_vline(data = muall, aes(xintercept=PoolSoon, color = "Pooled, Soon"), linetype="dashed", size=.5) +
  geom_vline(data = muall, aes(xintercept=PoolLate, color = "Pooled, Late"), linetype="dashed", size=.5) +
  geom_vline(data = muall, aes(xintercept=TrtSoon, color = "Treated, Soon"), linetype="dashed", size=.5) +
  geom_vline(data = muall, aes(xintercept=TrtLate, color = "Treated, Late"), linetype="dashed", size=.5) +
  geom_vline(data = muall, aes(xintercept=CntSoon, color = "Control, Soon"), linetype="dashed", size=.5) +
  geom_vline(data = muall, aes(xintercept=CntLate, color = "Control, Late"), linetype="dashed", size=.5) +
  labs(x = "Change in Network Similarity Score", 
       title = "Sampling Distribution of Change in Network Similarity Scores for V4",
       subtitle = "Accounting for the Delayed Round of Endline Measurement",
       color = "Observed \nAverage \nChange for:")+
  scale_color_manual(values = colors) +
  theme_minimal()



#####
## SI Table 8
#####

## Network Distance Measures
## Distribution of distances to warmest, coldest, persuaded, and backlashed
## Information for Table 8


table(Data$disttomostpos)
table(Data$disttomostneg)
table(Data$disttopersuaded)
table(Data$disttocranky2)



#####
## SI Table 9
#####


## Need to remake village datasets so they include all variables we have made
## since the last build

v1 <- Data[which(Data$village_number == 1),]
v2 <- Data[which(Data$village_number == 2),]
v3 <- Data[which(Data$village_number == 3),]
v4 <- Data[which(Data$village_number == 4),]


## Add table of descriptive statistics of variables in the regressions
## separated by village

datalist <- list(v1, v2, v3, v4, Data)
datalistnames <- c("Vlg 1", "Vlg 2", "Vlg 3", "Vlg 4", "All")
outmat <- matrix(data = NA, nrow = 15, ncol = length(datalist))

for (i in 1:length(datalist)){
  thedata <- datalist[[i]]
  outmat[1,i] <- mean(thedata$treatment)
  outmat[2,i] <- mean(thedata$neighbtreat, na.rm=T)
  outmat[3,i] <- mean(thedata$someneighbtreat, na.rm=T)
  outmat[4,i] <- mean(thedata$proref1)
  outmat[5,i] <- mean(thedata$neighbproref, na.rm=T)
  outmat[6,i] <- sum(thedata$mostpos == 1, na.rm=T)
  outmat[7,i] <- sum(thedata$mostneg==1, na.rm=T)
  outmat[8,i] <- sum(thedata$mostpersuaded, na.rm=T)
  outmat[9,i] <- sum(thedata$mostcranky, na.rm=T)
  outmat[10,i] <- mean(thedata$disttomostpos, na.rm=T)
  outmat[11,i] <- mean(thedata$disttomostneg, na.rm=T)
  outmat[12,i] <- mean(thedata$disttopersuaded, na.rm=T)
  outmat[13,i] <- mean(thedata$disttocranky2, na.rm=T)
  outmat[14,i] <- nrow(thedata)
  outmat[15,i] <- sum(thedata$in_el == 1)
  rm(thedata)
}

colnames(outmat) <- datalistnames
rownames(outmat) <- c( "Treated", "# Treated Neighbs", "Treated Neighbs", "Baseline Atts", "Neighbs Bl Atts", 
                       "Warmest", "Coldest", "Most Persuaded", "Most Backlash", "Dist to Warmest", 
                       "Dist to Coldest", "Dist to Persuaded", "Dist to Backlashed",
                       "Baseline hhs","Endline hhs")


## SI Table 9
xtable(outmat, align = "lrrrr|r")


#####
## SI Table 10
#####

## Replicating main table using the number of treated neighbors instead of 
## an indicator for the presence of treated neighbors


m_num1 <- lm(proref_e ~ treatment + neighbtreat +treatment*neighbtreat +
               neighbsize + neighbsize*treatment + proref1 + neighbproref +
               neighbproref*treatment + disttomostpos  , data = Data)

m_num2 <- lm(proref_e ~ treatment + neighbtreat +treatment*neighbtreat +
               neighbsize + neighbsize*treatment + proref1 + neighbproref +
               neighbproref*treatment + disttomostneg  , data = Data)

m_num3 <- lm(proref_e ~ treatment + neighbtreat +treatment*neighbtreat +
               neighbsize + neighbsize*treatment + proref1 + neighbproref +
               neighbproref*treatment + disttopersuaded , data = Data)

m_num4 <- lm(proref_e ~ treatment + neighbtreat +treatment*neighbtreat +
               neighbsize + neighbsize*treatment + proref1 + neighbproref +
               neighbproref*treatment + disttocranky2 , data = Data)


m_num5 <- lm(proref_e ~ treatment + neighbtreat +treatment*neighbtreat +
               neighbsize + neighbsize*treatment + proref1 + neighbproref +
               neighbproref*treatment + disttomostpos + 
               disttomostneg + disttopersuaded + disttocranky2 , data = Data)

## SI Table 10
stargazer(m_num1, m_num2, m_num3, m_num4,m_num5,
          keep.stat = c("n", "rsq"),
          single.row = FALSE)


#####
## SI Table 11
#####

## Replicate paper table 5 model 6 adding reference categories as controls

m6plusdistcont <- lm(proref_e ~ treatment + someneighbtreat +treatment*someneighbtreat +
                       neighbsize + neighbsize*treatment + proref1 + neighbproref +
                       neighbproref*treatment +
                       mostpos + mostneg + mostpersuaded + mostcranky + disttomostpos + 
                       disttomostneg + disttopersuaded + disttocranky2 , data = Data)

stargazer(m6plusdistcont, 
          keep.stat = c("n", "rsq"), single.row=TRUE)




#####
## SI Table 12
#####

## Replicate main specification, adding controls

m6_controls <- lm(proref_e ~ treatment + someneighbtreat +treatment*someneighbtreat +
                    neighbsize + neighbsize*treatment + proref1 + neighbproref +
                    neighbproref*treatment + disttomostpos + 
                    disttomostneg + disttopersuaded + disttocranky2 +
                    + resp_age + relig + resp_educ_coarse +
                    farmer + gender + been_refugee + 
                    morethan5, data = Data)


## SI Table 12
stargazer(m6_controls,
          keep.stat = c("n", "rsq"),
          single.row = TRUE)



#####
## SI Table 13
#####

## Simpler analyses of network features to isolate their role


m0a <- lm(proref_e ~ treatment + someneighbtreat + neighbsize + proref1 +
            disttomostpos +disttomostneg +
            disttopersuaded + disttocranky2, data = Data)

m0aa <- lm(proref_e ~ proref1 +
             disttomostpos +disttomostneg +
             disttopersuaded + disttocranky2, data = Data)

m0bb <- lm(proref_e ~ treatment + someneighbtreat + neighbsize + proref1 +
             neighbproref+
             disttomostpos +disttomostneg +
             disttopersuaded + disttocranky2, data = Data)

m0d <- lm(proref_e ~ proref1 + neighbproref, data = Data)

m0e <- lm(proref_e ~ treatment + someneighbtreat + neighbsize  +
            proref1 + neighbproref, data = Data)

## SI Table 13
stargazer(m0d, m0e, m0aa, m0a, m0bb, 
          keep.stat = c("n", "rsq"))





#####
## SI Figure 6
#####

## Plot the baseline views in each village over time with a linear 
## trend line overlaid 

p <- ggplot(data = Data[Data$village_number %in% c(1,2),], mapping = aes(x = today, y = proref1))

p + geom_jitter(width = .15, aes(color = as.character(village_number)))+
  ylim(0,31) +
  geom_smooth(method = "lm")+
  labs(x = "Date", 
       y = "Baseline pro-refugee score",
       title = "Baseline views over time, Villages 1 and 2",
       color = "Village")+
  theme_minimal() 

## save as "baseline_time_v1v2.pdf", 6x6

p <- ggplot(data = Data[Data$village_number %in% c(3,4),], mapping = aes(x = today, y = proref1))

p + geom_jitter(width = .15, aes(color = as.character(village_number)))+
  ylim(0,31) +
  geom_smooth(method = "lm")+
  labs(x = "Date", 
       y = "Baseline pro-refugee score",
       title = "Baseline views over time, Villages 3 and 4",
       color = "Village")+
  theme_minimal() 

## save as "baseline_time_v3v4.pdf", 6x6




#####
## SI Figure 7
#####

## Remake with flexibile smoother, adding a line for start of treatment wave
## These use the indices about the first date of the treatment wave made above

p <- ggplot(data = Data[Data$village_number %in% c(1,2),], mapping = aes(x = today, y = proref1))

p + geom_jitter(width = .15,aes(color = as.character(village_number)))+
  ylim(0,31) +
  geom_smooth(method = "loess", span = .35)+
  geom_vline(xintercept = firsttrtv1, linetype="dashed", size=.5, color = "red")+
  geom_vline(xintercept = firsttrtv2b, linetype="dashed", size=.5, color = "darkcyan")+
  labs(x = "Date", 
       y = "Baseline pro-refugee score",
       title = "Baseline views over time, Villages 1 and 2",
       color = "Village")+
  theme_minimal() 

## save as "baseline_time_v1v2loessTstart.pdf", 6x6

p <- ggplot(data = Data[Data$village_number %in% c(3,4),], mapping = aes(x = today, y = proref1))

p + geom_jitter(width = .15,aes(color = as.character(village_number)))+
  ylim(0,31) +
  geom_smooth(method = "loess", span = .35)+
  geom_vline(xintercept = firsttrtv3, linetype="dashed", size=.5, color = "red")+
  geom_vline(xintercept = firsttrtv4, linetype="dashed", size=.5, color = "darkcyan")+
  labs(x = "Date", 
       y = "Baseline pro-refugee score",
       title = "Baseline views over time, Villages 3 and 4",
       color = "Village")+
  theme_minimal() 

## save as "baseline_time_v3v4loessTstart.pdf", 6x6















